gusucode.com > 语音分析源码程序 > 语音分析源码程序/HTK MFCC MATLAB/mfcc/compare.m

    % COMPARE Comparison of selected MFCC feature extraction tools.
%
%   This script compares the cepstral features extracted using the 
%   included MFCC routine, against those extracted using HTK [1] 
%   and MELFCC [2] tools. HTK file format input/output is achieved 
%   using the included simple HTKREAD_LITE and HTKWRITE_LITE 
%   routines. Further functionality can be achieved by installing
%   the VOICEBOX toolbox [3]. 
%
%   This script can be run as is, without HTK [1] and MELFCC [2] tools
%   installed (as these are not included). In such case, features 
%   extracted using the above tools are supplied so that comparisons 
%   can be made. If HTK [1] and/or MELFCC [2] tools are installed, 
%   then their use can be enabled (see the comments in source code). 
%
%   Note that this work has been done and tested on Linux only.
%
%   References
%
%           [1] Young, S., Evermann, G., Gales, M., Hain, T., Kershaw, D., 
%               Liu, X., Moore, G., Odell, J., Ollason, D., Povey, D., 
%               Valtchev, V., Woodland, P., 2006. The HTK Book (for HTK 
%               Version 3.4.1). Engineering Department, Cambridge University.
%               (see also: http://htk.eng.cam.ac.uk)
%
%           [2] Ellis, D., Reproducing the feature outputs of common programs 
%               using Matlab and melfcc.m. Online resource, url: 
%               http://labrosa.ee.columbia.edu/matlab/rastamat/mfccs.html
%
%           [3] Brookes, M., VOICEBOX: Speech Processing Toolbox for MATLAB.
%               On-line resource, url: 
%               http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
%
%   See also MFCC, EXAMPLE, HTKREAD, HTKREAD_LITE, HTKWRITE, HTKWRITE_LITE.

%   Author: Kamil Wojcicki, September 2011


    %% PRELIMINARIES

    % clean-up MATLAB's environment
    clear all; close all; clc; % fprintf( '.\n' );


    % log everything to file for future reference
    diary( sprintf('%s.txt',mfilename) ); 


    % function handle for mean square error computation 
    MSE = @(x,y)(mean((x(:)-y(:)).^2));


    % function handle for newline display
    newline = @()( fprintf('\n') );


    % note: changing the following _two_ settings
    % will require additional recoding, i.e., 
    % these two fields do not automatically propagate
    % through the simple examples presented here.
    TARGETKIND = 'MFCC_0';  % HTK feature type 
    TYPECODE = 6+8192;      % HTK type code (see Sec. 5.10.1 of [1], pp. 80-81)

    Tw = 25;                % analysis frame duration (ms)
    Ts = 10;                % analysis frame shift (ms)
    alpha = 0.97;           % preemphasis coefficient
    M = 20;                 % number of filterbank channels 
    C = 12;                 % number of cepstral coefficients 
    L = 22;                 % cepstral sine lifter parameter
    LF = 300;               % lower frequency limit (Hz)
    HF = 3700;              % upper frequency limit (Hz)
    wav_file = 'sp10.wav';  % input audio filename

    % get the base part (without extension) of the audio filename
    [ basename, basename ] = fileparts( wav_file );

    % read speech samples, sampling rate and precision from file
    [ speech, fs, nbits ] = wavread( wav_file );


    %% HTK'S [1] HCOPY TOOL

    % specify HTK feature filename
    htk.feature_file = sprintf( '%s_htk.mfc', basename );

    % enable the following if you have HTK tools installed
    % and want to run HCopy feature extraction
    if false

        % generate HTK configuration
        htk.config = {};
        htk.config{end+1} = sprintf( '# MATLAB generated HTK config' );
        htk.config{end+1} = sprintf( 'SOURCEFORMAT = WAV' );
        htk.config{end+1} = sprintf( 'TARGETKIND = %s', TARGETKIND );
        htk.config{end+1} = sprintf( 'WINDOWSIZE = %0.1f', Tw*1E4 ); % in 100 ns units
        htk.config{end+1} = sprintf( 'TARGETRATE = %0.1f', Ts*1E4 ); % in 100 ns units
        htk.config{end+1} = sprintf( 'PREEMCOEF = %0.2f', alpha );
        htk.config{end+1} = sprintf( 'NUMCHANS = %d', M );
        htk.config{end+1} = sprintf( 'CEPLIFTER = %d', L );
        htk.config{end+1} = sprintf( 'NUMCEPS = %d', C );
        htk.config{end+1} = sprintf( 'LOFREQ = %d', LF );
        htk.config{end+1} = sprintf( 'HIFREQ = %d', HF );
        htk.config{end+1} = sprintf( 'USEHAMMING = T' );
        htk.config{end+1} = sprintf( 'SAVEWITHCRC = F');
    
        % specify HTK configuration file
        htk.config_file = 'htkmfcc.conf';

        % write HTK config to file
        cell2file( htk.config, htk.config_file );

        % run HTK's HCopy feature extraction
        eval( sprintf('! HCopy -A -D -V -T 1 -C %s %s %s', ...
                             htk.config_file, wav_file, htk.feature_file) ); % requires HTK [1] (not included)

    end

    % read the HCopy extracted features and rearrange them, since in HTK [1]
    % the 0th cepstral coefficients are stored at the end of each vector
    % where as here the 0th cepstral coefficient comes at the start

    % read HTK features from file
    % htk.mfcc = readhtk( htk.feature_file ); % requires VOICEBOX [3] (not included)
    htk.mfcc = readhtk_lite( htk.feature_file ); % requires 'vanilla' implementation (included)

    % rearrange and make column vectors
    htk.mfcc = htk.mfcc(:, [end 1:end-1]).'; 


    %% MELFCC [2] TOOL BY DAN ELLIS

    % specify MELFCC feature filename
    ellis.feature_file = sprintf( '%s_melfcc.mfc', basename );

    % enable the following if you have Dan Ellis' rastamat tools [2] 
    % installed and want to run MELFCC based feature extraction
    if false
        % extract MFCCs using RASTAMAT tools [2] (not included)
        ellis.mfcc = 0.5*melfcc( speech, fs, 'wintime', Tw*1E-3, ...
            'hoptime', Ts*1E-3, 'preemph', 0.97, 'minfreq', LF, ...
            'maxfreq', HF, 'nbands', M, 'numcep', C+1, 'lifterexp', -L, ...
            'dcttype', 3, 'fbtype', 'htkmel', 'sumpower', 0 );

        % write MELFCC extracted features to file
        % writehtk( ellis.feature_file, ellis.mfcc([2:end 1],:).', Ts*1E-3, TYPECODE ); % requires VOICEBOX [3] (not included)
        writehtk_lite( ellis.feature_file, ellis.mfcc([2:end 1],:).', Ts*1E-3, TYPECODE ); % requires 'vanilla' implementation (included)
    else
        %ellis.mfcc = readhtk( ellis.feature_file ); % requires VOICEBOX [3] (not included)
        ellis.mfcc = readhtk_lite( ellis.feature_file ); % requires 'vanilla' implementation (included)

        % rearrange and make column vectors
        ellis.mfcc = ellis.mfcc(:, [end 1:end-1]).';
    end


    %% MFCC TOOL (THIS IMPLEMENTATION)

    % specify MFCC feature filename
    this.feature_file = sprintf( '%s_mfcc.mfc', basename );

    % extract using the included mfcc(...) function (this implementation)
    this.mfcc = mfcc( speech, fs, Tw, Ts, alpha, @hamming, [LF HF], M, C+1, L );

    % write MFCC extracted features to file
    % writehtk( this.feature_file, this.mfcc([2:end 1],:).', Ts*1E-3, TYPECODE ); % requires VOICEBOX [3] (not included)
    writehtk_lite( this.feature_file, this.mfcc([2:end 1],:).', Ts*1E-3, TYPECODE ); % requries 'vanilla' implementation (included)


    %% HTK'S HLIST COMPARISONS
    
    % enable the following if you have HTK [1] tools installed and want to run HList 
    % as a sanity check for features extracted using different tools compared here
    if false
        Nv = 3; % number of vectors to list

        % run HList feature display for HCopy extracted features
        eval( sprintf('! HList -A -T 1 -o -h -e %i -i %i %s', ...
                        Nv, C+1, htk.feature_file) ); newline(); % requires HTK [1] (not included)

        % run HList feature display for MELFCC extracted features
        eval( sprintf('! HList -A -T 1 -o -h -e %i -i %i %s', ...
                        Nv, C+1, ellis.feature_file) ); newline(); % requires HTK [1] (not included)

        % run HList feature display for MFCC extracted features
        eval( sprintf('! HList -A -T 1 -o -h -e %i -i %i %s', ...
                        Nv, C+1, this.feature_file) ); newline(); % requires HTK [1] (not included)
    end

    
    %% PLOT COMPARISONS

    % time vector (s) for frames or features
    time = [0:size(this.mfcc,2)-1]*Ts*1E-3+0.5*Tw*1E-3; 

    % feature plots for the three considered implementations
    figure('Position', [30 30 800 600], 'PaperPositionMode', 'auto', ... 
              'color', 'w', 'PaperOrientation', 'landscape', 'Visible', 'on' ); 

    subplot( 3,1,1 );    
    imagesc( time, [0:M-1], htk.mfcc ); axis( 'xy' );
    xlim( [ min(time) max(time) ] );
    xlabel( 'Time (s)' ); 
    ylabel( 'Cepstrum index' );
    title( 'Mel frequency cepstrum: HTK' );

    subplot( 3,1,2 );    
    imagesc( time, [0:M-1], ellis.mfcc ); axis( 'xy' );
    xlim( [ min(time) max(time) ] );
    xlabel( 'Time (s)' ); 
    ylabel( 'Cepstrum index' );
    title( 'Mel frequency cepstrum: MELFCC' );

    subplot( 3,1,3 );    
    imagesc( time, [0:M-1], this.mfcc ); axis( 'xy' );
    xlim( [ min(time) max(time) ] );
    xlabel( 'Time (s)' ); 
    ylabel( 'Cepstrum index' );
    title( 'Mel frequency cepstrum: THIS' );

    print('-dpdf', sprintf('%s.pdf', mfilename)); 
    print('-dpng', sprintf('%s.png', mfilename)); 


    %% MSE COMPARISONS

    % print mean square error values
    fprintf( '\nMFCC MSE(MELFCC,THIS) : %7.2f\n', MSE(ellis.mfcc,this.mfcc) );
    fprintf( 'MFCC MSE(HTK,MELFCC)  : %7.2f\n', MSE(htk.mfcc,ellis.mfcc) );
    fprintf( 'MFCC MSE(HTK,THIS)    : %7.2f\n\n', MSE(htk.mfcc,this.mfcc) );

    % print variance of HTK MFCCs
    fprintf( 'HTK MFCC variance     : %7.2f\n\n', var(htk.mfcc(:)) );


    % flush and close the log file
    diary( 'off' );


% EOF